*** How worried should we be? The implications of fabricated survey data for political science
*** Table A12: Item-Level Effects of Fabricated Data in Peru

set more off

* set directory to location of dataset in following line
cd "C:\~\Downloads\"

use "QAC_2017_12.21.17.dta", clear

keep if pais == 11

gen clean_data = 1 if cancelled == 0
replace clean_data = 0 if cancelled == 1
replace clean_data = 0 if ls3 == 999999
replace clean_data = 0 if fraud == 1
replace fraud = 0 if ls3 == 999999

** gen age in years
gen birth_yr = q2
replace birth_yr = . if inlist(birth_yr, 888888, 988888, 999999)

gen svy_date = date(date, "MDYhm")
format svy_date %td
gen year = year(svy_date)

gen upload_time = Clock(upload, "MDYhm")
format upload_time %tC
gen upload_hour = hhC(upload_time)

gen age_yrs = birth_yr
gen edad = 1 if age_yrs >= 18 & age_yrs <=25
replace edad = 2 if age_yrs >= 26 & age_yrs <=35
replace edad = 3 if age_yrs >= 36 & age_yrs <=45
replace edad = 4 if age_yrs >= 46 & age_yrs <=55
replace edad = 5 if age_yrs >= 56 & age_yrs <=65
replace edad = 6 if age_yrs >= 66 
label var edad "Age Cohort"
label define agegroup 1 "18-25 years old" 2 "26-35 years old" 3 "36-45 years old" 4 "46-55 years old" 5 "56-65 years old" 6 "66+ years old" 
label values edad agegroup

* education level
gen ed_level = 1 if inlist(ed, 0, 1, 2, 3, 4, 5, 6)
replace ed_level = 2 if inlist(ed, 7, 8, 9, 10, 11)
replace ed_level = 3 if inlist(ed, 12, 13, 14, 15, 16, 17, 18)
label var ed_level "Education Level"
label define edlevel 1 "None or Primary Ed." 2 "Secondary Ed." 3 "University+ Ed."
label values ed_level edlevel
label var ed "Years of Education"
replace ed = . if inlist(ed, 888888, 988888, 999999)

** matching
 set seed 339487731
* first flag cases that were canceled but not labeled as fraudulent
gen canceled_nonfraud = 1 if status == "Canceled" & fraud == 0
replace canceled_nonfraud = 0 if canceled_nonfraud == .
drop if canceled_nonfraud == 1

cem upm1a (#0) q1 (#0) agequota (#0) if canceled_nonfraud == 0, tr(fraud) k2k

* loop over all questions
replace dst1b = dst1b1 if dst1b2 == 999999 & pais != 22
replace dst1b = dst1b2 if dst1b1 == 999999 & pais != 22
replace env2b = env2b1 if env2b2 == 999999 & pais != 22
replace env2b = env2b2 if env2b1 == 999999 & pais != 22
replace drk1 = drk11 if drk12 == 999999 & pais != 22
replace drk1 = drk12 if drk11 == 999999 & pais != 22
replace env1c = env1c1 if env1c2 == 999999 & pais != 22
replace env1c = env1c2 if env1c1 == 999999 & pais != 22
replace mil10un = mil10un1 if mil10un2 == 999999 & pais != 22
replace mil10un = mil10un2 if mil10un1 == 999999 & pais != 22
replace mil10a = mil10a1 if mil10a2 == 999999 & pais != 22
replace mil10a = mil10a2 if mil10a1 == 999999 & pais != 22
replace mil10e = mil10e1 if mil10e2 == 999999 & pais != 22
replace mil10e = mil10e2 if mil10e1 == 999999 & pais != 22
replace mil10oas = mil10oas1 if mil10oas2 == 999999 & pais != 7
replace mil10oas = mil10oas2 if mil10oas1 == 999999 & pais != 7
replace mil10oas = mil10oa if pais == 22

* non-response (cp20, ccq2, ccq4 do not have any non-response among matched interviews)
postfile qdata str32 question nonresponse_clean nonresponse_fraud nr_diff diff_nr_zstat using "questions_nr.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	gen `x'_nr = 1 if inlist(`x', 888888, 988888, 99)
	replace `x'_nr = 0 if `x'_nr == . & `x' != 999999
	replace `x'_nr = . if `x' == 999999
	prtest `x'_nr if cem_matched == 1, by(fraud)
	post qdata ("`x'") (r(P_1)) (r(P_2)) (r(P_1)-r(P_2)) (r(z))
}
*
postclose qdata
* diff of means
postfile qdata str32 question clean_mu clean_sd fraud_mu fraud_sd diff diff_se n1 n2 dof pval using "questions_mean.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 cp20 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 ccq2 ccq4 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	recode `x' (888888 988888 999999 99 = .)
	summ `x'
	gen `x'_rs = (`x' - r(min))/(r(max) - r(min)) * 100
	ttest `x'_rs if cem_matched == 1, by(fraud) unequal
	post qdata ("`x'") (r(mu_1)) (r(sd_1)) (r(mu_2)) (r(sd_2)) (r(mu_1)-r(mu_2)) (r(se)) (r(N_1)) (r(N_2)) (r(df_t)) (r(p))
}
*
postclose qdata
* diff of sd
postfile qdata str32 question sd_clean sd_fraud diff_sd_fstat diff_sd_pval using "questions_sd.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 cp20 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 ccq2 ccq4 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	sdtest `x'_rs if cem_matched == 1, by(fraud)
	post qdata ("`x'") (r(sd_1)) (r(sd_2)) (r(F)) (r(p))
}
*
postclose qdata
* scale of items
postfile qdata str32 question scale using "questions_sc.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 cp20 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 ccq2 ccq4 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	qui summ `x'
	post qdata ("`x'") (r(max))
}
*
postclose qdata

use questions_mean.dta, clear
merge 1:1 question using questions_sd.dta, nogen
merge 1:1 question using questions_nr.dta, nogen
merge 1:1 question using questions_sc.dta, nogen

erase questions_sd.dta
erase questions_mean.dta
erase questions_nr.dta
erase questions_sc.dta

gen mean_comp_test90 = 1 if pval < .1
replace mean_comp_test90 = 0 if pval >= .1
gen var_comp_test90 = 1 if diff_sd_pval < .1
replace var_comp_test90 = 0 if diff_sd_pval >= .1
gen nr_diff_test90 = 1 if abs(diff_nr_zstat) > 1.64485
replace nr_diff_test90 = 0 if abs(diff_nr_zstat) <= 1.64485

** with bonferroni and sidak corrections
* bonferroni
gen mean_comp_test_bonf = 1 if pval < (.05/_N)
replace mean_comp_test_bonf = 0 if pval >= (.05/_N)
gen mean_comp_test_bonf90 = 1 if pval < (.1/_N)
replace mean_comp_test_bonf90 = 0 if pval >= (.1/_N)
gen var_comp_test_bonf = 1 if diff_sd_pval < (.05/_N)
replace var_comp_test_bonf = 0 if diff_sd_pval >= (.05/_N)
gen var_comp_test_bonf90 = 1 if diff_sd_pval < (.1/_N)
replace var_comp_test_bonf90 = 0 if diff_sd_pval >= (.1/_N)
gen nr_diff_test_bonf = 1 if abs(diff_nr_zstat) > 3.14399
replace nr_diff_test_bonf = 0 if abs(diff_nr_zstat) <= 3.14399
replace nr_diff_test_bonf = 0 if nr_diff == .
gen nr_diff_test_bonf90 = 1 if abs(diff_nr_zstat) > 2.97831
replace nr_diff_test_bonf90 = 0 if abs(diff_nr_zstat) <= 2.97831
replace nr_diff_test_bonf90 = 0 if nr_diff == .
gen magn_diff_sd = abs(diff) / clean_sd

* holm's step-down procedure (alpha = .10)
gsort pval
gen mean_pval_rank = _n
gen mean_test_holm_alpha = .10 / (_N - mean_pval_rank + 1)
gen mean_comp_test_holm = 1 if pval < mean_test_holm_alpha
replace mean_comp_test_holm = 0 if pval > mean_test_holm_alpha

gsort diff_sd_pval
gen var_pval_rank = _n
gen var_test_holm_alpha = .10 / (_N - var_pval_rank + 1)
gen var_comp_test_holm = 1 if pval < var_test_holm_alpha
replace var_comp_test_holm = 0 if pval > var_test_holm_alpha

gen abs_diff_nr_zstat = abs(diff_nr_zstat) 
gsort -abs_diff_nr_zstat
gen nr_z_rank = _n
gen nr_test_holm_alpha = abs(invnormal((.10 / (_N - nr_z_rank + 1))/2))
gen nr_comp_test_holm = 1 if abs_diff_nr_zstat > nr_test_holm_alpha
replace nr_comp_test_holm = 0 if abs_diff_nr_zstat < nr_test_holm_alpha

* hochberg's step-down procedure (alpha = .10)
gsort -pval
gen mean_pval_rank_r = _n
gen mean_test_hoch_alpha = .10 / (_N - mean_pval_rank_r + 1)
gen mean_comp_test_hoch = 1 if pval < mean_test_hoch_alpha
replace mean_comp_test_hoch = 0 if pval > mean_test_hoch_alpha

gsort -diff_sd_pval
gen var_pval_rank_r = _n
gen var_test_hoch_alpha = .10 / (_N - var_pval_rank_r + 1)
gen var_comp_test_hoch = 1 if pval < var_test_hoch_alpha
replace var_comp_test_hoch = 0 if pval > var_test_hoch_alpha

gsort abs_diff_nr_zstat
gen nr_z_rank_r = _n
gen nr_test_hoch_alpha = abs(invnormal((.10 / (_N - nr_z_rank_r + 1))/2))
gen nr_comp_test_hoch = 1 if abs_diff_nr_zstat > nr_test_hoch_alpha
replace nr_comp_test_hoch = 0 if abs_diff_nr_zstat < nr_test_hoch_alpha


*** TABLE A12 RESULTS ***
** Column 1 Row 1 (Difference in means) range of values
tab1 mean_comp_test_bonf90 mean_comp_test_holm mean_comp_test_hoch mean_comp_test90
** Column 1 Row 2 (Average magnitude) range of values
summ magn_diff_sd if mean_comp_test90 == 1
summ magn_diff_sd if mean_comp_test_bonf90 == 1
summ magn_diff_sd if mean_comp_test_holm == 1
summ magn_diff_sd if mean_comp_test_hoch == 1
** Column 1 Row 3 (Difference in variances) range of values
tab1 var_comp_test_bonf90 var_comp_test_holm var_comp_test_hoch var_comp_test90
** Column 1 Row 4 (Item nonresponse) range of values
tab1 nr_diff_test_bonf90 nr_comp_test_holm nr_comp_test_hoch nr_diff_test90

** Data processing for column 2 (compromised vs clean)
use "QAC_2017_12.21.17.dta", clear

keep if pais == 11

gen clean_data = 1 if cancelled == 0
replace clean_data = 0 if cancelled == 1
replace clean_data = 0 if ls3 == 999999
replace clean_data = 0 if fraud == 1
replace fraud = 0 if ls3 == 999999

** gen age in years
gen birth_yr = q2
replace birth_yr = . if inlist(birth_yr, 888888, 988888, 999999)

gen svy_date = date(date, "MDYhm")
format svy_date %td
gen year = year(svy_date)

gen upload_time = Clock(upload, "MDYhm")
format upload_time %tC
gen upload_hour = hhC(upload_time)

gen age_yrs = birth_yr
gen edad = 1 if age_yrs >= 18 & age_yrs <=25
replace edad = 2 if age_yrs >= 26 & age_yrs <=35
replace edad = 3 if age_yrs >= 36 & age_yrs <=45
replace edad = 4 if age_yrs >= 46 & age_yrs <=55
replace edad = 5 if age_yrs >= 56 & age_yrs <=65
replace edad = 6 if age_yrs >= 66 
label var edad "Age Cohort"
label define agegroup 1 "18-25 years old" 2 "26-35 years old" 3 "36-45 years old" 4 "46-55 years old" 5 "56-65 years old" 6 "66+ years old" 
label values edad agegroup

* education level
gen ed_level = 1 if inlist(ed, 0, 1, 2, 3, 4, 5, 6)
replace ed_level = 2 if inlist(ed, 7, 8, 9, 10, 11)
replace ed_level = 3 if inlist(ed, 12, 13, 14, 15, 16, 17, 18)
label var ed_level "Education Level"
label define edlevel 1 "None or Primary Ed." 2 "Secondary Ed." 3 "University+ Ed."
label values ed_level edlevel
label var ed "Years of Education"
replace ed = . if inlist(ed, 888888, 988888, 999999)

** matching
 set seed 339487731
* first flag cases that were canceled but not labeled as fraudulent
gen canceled_nonfraud = 1 if status == "Canceled" & fraud == 0
replace canceled_nonfraud = 0 if canceled_nonfraud == .
drop if canceled_nonfraud == 1

cem upm1a (#0) q1 (#0) agequota (#0) if canceled_nonfraud == 0, tr(fraud) k2k

** label data for full sample comparisons
* first drop canceled cases that were not likelyfrauds and matched to clean cases
drop if clean_data == 0 & cem_matched != 1
* 1 = fake, 2 = clean matched, 3 = rest of data
gen comparison_groups = 1 if fraud == 1 & cem_matched == 1
replace comparison_groups = 2 if fraud == 0 & cem_matched == 1
replace comparison_groups = 3 if fraud == 0 & cem_matched == 0

* double the "rest of the dataset" (non-matched clean interviews)
gen exp=1
replace exp=2 if comparison_group == 3
expand exp, gen(copy)

* compromised versus clean indicator (1 = clean, 2 = compromised)
gen clean_or_compr = .
replace clean_or_compr = 1 if comparison_group == 2 | copy == 0
replace clean_or_compr = 2 if comparison_group == 1 | copy == 1

gen clean = 1 if clean_or_compr == 1
replace clean = 0 if clean_or_compr == 2
lab define clean_comp_lab 0 "Compromised" 1 "Clean"
lab values clean clean_comp_lab


* loop over all questions
replace dst1b = dst1b1 if dst1b2 == 999999 & pais != 22
replace dst1b = dst1b2 if dst1b1 == 999999 & pais != 22
replace env2b = env2b1 if env2b2 == 999999 & pais != 22
replace env2b = env2b2 if env2b1 == 999999 & pais != 22
replace drk1 = drk11 if drk12 == 999999 & pais != 22
replace drk1 = drk12 if drk11 == 999999 & pais != 22
replace env1c = env1c1 if env1c2 == 999999 & pais != 22
replace env1c = env1c2 if env1c1 == 999999 & pais != 22
replace mil10un = mil10un1 if mil10un2 == 999999 & pais != 22
replace mil10un = mil10un2 if mil10un1 == 999999 & pais != 22
replace mil10a = mil10a1 if mil10a2 == 999999 & pais != 22
replace mil10a = mil10a2 if mil10a1 == 999999 & pais != 22
replace mil10e = mil10e1 if mil10e2 == 999999 & pais != 22
replace mil10e = mil10e2 if mil10e1 == 999999 & pais != 22
replace mil10oas = mil10oas1 if mil10oas2 == 999999 & pais != 7
replace mil10oas = mil10oas2 if mil10oas1 == 999999 & pais != 7
replace mil10oas = mil10oa if pais == 22

* non-response (cp20, ccq2, ccq4 do not have any non-response among matched interviews)
postfile qdata str32 question nonresponse_clean nonresponse_fraud nr_diff diff_nr_zstat using "questions_nr.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	gen `x'_nr = 1 if inlist(`x', 888888, 988888, 99)
	replace `x'_nr = 0 if `x'_nr == . & `x' != 999999
	replace `x'_nr = . if `x' == 999999
	prtest `x'_nr, by(clean_or_compr)
	post qdata ("`x'") (r(P_1)) (r(P_2)) (r(P_1)-r(P_2)) (r(z))
}
*
postclose qdata
* diff of means
postfile qdata str32 question clean_mu clean_sd fraud_mu fraud_sd diff diff_se n1 n2 dof pval using "questions_mean.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 cp20 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 ccq2 ccq4 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	recode `x' (888888 988888 999999 99 = .)
	summ `x'
	gen `x'_rs = (`x' - r(min))/(r(max) - r(min)) * 100
	ttest `x'_rs, by(clean_or_compr) unequal
	post qdata ("`x'") (r(mu_1)) (r(sd_1)) (r(mu_2)) (r(sd_2)) (r(mu_1)-r(mu_2)) (r(se)) (r(N_1)) (r(N_2)) (r(df_t)) (r(p))
}
*
postclose qdata
* diff of sd
postfile qdata str32 question sd_clean sd_fraud diff_sd_fstat diff_sd_pval using "questions_sd.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 cp20 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 ccq2 ccq4 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	sdtest `x'_rs, by(clean_or_compr)
	post qdata ("`x'") (r(sd_1)) (r(sd_2)) (r(F)) (r(p))
}
*
postclose qdata
* scale of items
postfile qdata str32 question scale using "questions_sc.dta", replace
foreach x of varlist ls3 soct2 idio2 sgl1 cp6 cp7 cp8 cp13 cp20 it1 l1 aoj11 aoj12 ///
b1 b2 b3 b4 b6 b43 b12 b13 b18 b21 b21a b32 b37 b47a m1 m2 sd2new2 sd3new2 sd6new2 ///
infrax infra3 ros1 ros4 ing4 eff1 eff2 media3 media4 pn4 e5 e15 d1 d2 d3 d4 d5 d6 ///
lib1 lib2b lib2c lib4 exc7new pol1 ccq2 ccq4 q5a q5b q10e www1 gi0 e16 aoj22new ///
ivv3 exc7 ie3 ie6 ie9 ie10 envp8 {
	di "`x'"
	qui summ `x'
	post qdata ("`x'") (r(max))
}
*
postclose qdata

use questions_mean.dta, clear
merge 1:1 question using questions_sd.dta, nogen
merge 1:1 question using questions_nr.dta, nogen
merge 1:1 question using questions_sc.dta, nogen

erase questions_sd.dta
erase questions_mean.dta
erase questions_nr.dta
erase questions_sc.dta

gen mean_comp_test90 = 1 if pval < .1
replace mean_comp_test90 = 0 if pval >= .1
gen var_comp_test90 = 1 if diff_sd_pval < .1
replace var_comp_test90 = 0 if diff_sd_pval >= .1
gen nr_diff_test90 = 1 if abs(diff_nr_zstat) > 1.64485
replace nr_diff_test90 = 0 if abs(diff_nr_zstat) <= 1.64485

** with bonferroni and sidak corrections
* bonferroni
gen mean_comp_test_bonf = 1 if pval < (.05/_N)
replace mean_comp_test_bonf = 0 if pval >= (.05/_N)
gen mean_comp_test_bonf90 = 1 if pval < (.1/_N)
replace mean_comp_test_bonf90 = 0 if pval >= (.1/_N)
gen var_comp_test_bonf = 1 if diff_sd_pval < (.05/_N)
replace var_comp_test_bonf = 0 if diff_sd_pval >= (.05/_N)
gen var_comp_test_bonf90 = 1 if diff_sd_pval < (.1/_N)
replace var_comp_test_bonf90 = 0 if diff_sd_pval >= (.1/_N)
gen nr_diff_test_bonf = 1 if abs(diff_nr_zstat) > 3.14399
replace nr_diff_test_bonf = 0 if abs(diff_nr_zstat) <= 3.14399
replace nr_diff_test_bonf = 0 if nr_diff == .
gen nr_diff_test_bonf90 = 1 if abs(diff_nr_zstat) > 2.97831
replace nr_diff_test_bonf90 = 0 if abs(diff_nr_zstat) <= 2.97831
replace nr_diff_test_bonf90 = 0 if nr_diff == .
gen magn_diff_sd = abs(diff) / clean_sd

* holm's step-down procedure (alpha = .10)
gsort pval
gen mean_pval_rank = _n
gen mean_test_holm_alpha = .10 / (_N - mean_pval_rank + 1)
gen mean_comp_test_holm = 1 if pval < mean_test_holm_alpha
replace mean_comp_test_holm = 0 if pval > mean_test_holm_alpha

gsort diff_sd_pval
gen var_pval_rank = _n
gen var_test_holm_alpha = .10 / (_N - var_pval_rank + 1)
gen var_comp_test_holm = 1 if pval < var_test_holm_alpha
replace var_comp_test_holm = 0 if pval > var_test_holm_alpha

gen abs_diff_nr_zstat = abs(diff_nr_zstat) 
gsort -abs_diff_nr_zstat
gen nr_z_rank = _n
gen nr_test_holm_alpha = abs(invnormal((.10 / (_N - nr_z_rank + 1))/2))
gen nr_comp_test_holm = 1 if abs_diff_nr_zstat > nr_test_holm_alpha
replace nr_comp_test_holm = 0 if abs_diff_nr_zstat < nr_test_holm_alpha

* hochberg's step-down procedure (alpha = .10)
gsort -pval
gen mean_pval_rank_r = _n
gen mean_test_hoch_alpha = .10 / (_N - mean_pval_rank_r + 1)
gen mean_comp_test_hoch = 1 if pval < mean_test_hoch_alpha
replace mean_comp_test_hoch = 0 if pval > mean_test_hoch_alpha

gsort -diff_sd_pval
gen var_pval_rank_r = _n
gen var_test_hoch_alpha = .10 / (_N - var_pval_rank_r + 1)
gen var_comp_test_hoch = 1 if pval < var_test_hoch_alpha
replace var_comp_test_hoch = 0 if pval > var_test_hoch_alpha

gsort abs_diff_nr_zstat
gen nr_z_rank_r = _n
gen nr_test_hoch_alpha = abs(invnormal((.10 / (_N - nr_z_rank_r + 1))/2))
gen nr_comp_test_hoch = 1 if abs_diff_nr_zstat > nr_test_hoch_alpha
replace nr_comp_test_hoch = 0 if abs_diff_nr_zstat < nr_test_hoch_alpha


*** TABLE A12 RESULTS ***
** Column 2 Row 1 (Difference in means) range of values
tab1 mean_comp_test_bonf90 mean_comp_test_holm mean_comp_test_hoch mean_comp_test90
** Column 2 Row 2 (Average magnitude) range of values
summ magn_diff_sd if mean_comp_test90 == 1
summ magn_diff_sd if mean_comp_test_bonf90 == 1
summ magn_diff_sd if mean_comp_test_holm == 1
summ magn_diff_sd if mean_comp_test_hoch == 1
** Column 2 Row 3 (Difference in variances) range of values
tab1 var_comp_test_bonf90 var_comp_test_holm var_comp_test_hoch var_comp_test90
** Column 2 Row 4 (Item nonresponse) range of values
tab1 nr_diff_test_bonf90 nr_comp_test_holm nr_comp_test_hoch nr_diff_test90
